Exploratory Data Analysis for the Crime Forecasting Challenge
d <- read.csv(file = "data/mp_data.csv", header = T, sep = ";", stringsAsFactors = F)
data <- d
# convert character to date
data$occ_date <- as.POSIXct(data$occ_date, format="%d/%m/%Y")
# removing na's
data <- na.omit(data)
## 'data.frame': 824247 obs. of 10 variables:
## $ CATEGORY : chr "STREET CRIMES" "STREET CRIMES" "STREET CRIMES" "STREET CRIMES" ...
## $ CALL.GROUPS : chr "DISORDER" "DISORDER" "DISORDER" "DISORDER" ...
## $ final_case_type: chr "DISTP " "DISTP " "DISTP " "DISTP " ...
## $ CASE.DESC : chr "DISTURBANCE - PRIORITY " "DISTURBANCE - PRIORITY " "DISTURBANCE - PRIORITY " "DISTURBANCE - PRIORITY " ...
## $ occ_date : POSIXct, format: "2012-03-01" "2012-03-01" ...
## $ x_coordinate : int 7641076 7642640 7643599 7644359 7644771 7650214 7653737 7666126 7673214 7679775 ...
## $ y_coordinate : int 684831 683167 683216 693642 683859 692359 698495 671764 671625 678272 ...
## $ census_tract : int 4900 10600 10600 3502 10600 2401 3200 702 8302 9201 ...
## $ DISTRICT : int 810 842 842 590 842 660 620 922 971 952 ...
## $ PRECINCT : chr "CE" "CE" "CE" "NO" ...
## CATEGORY CALL.GROUPS final_case_type
## 1 STREET CRIMES DISORDER DISTP
## 2 STREET CRIMES DISORDER DISTP
## 3 STREET CRIMES DISORDER DISTP
## 4 STREET CRIMES DISORDER DISTP
## 5 STREET CRIMES DISORDER DISTP
## 6 STREET CRIMES DISORDER DISTP
## CASE.DESC occ_date
## 1 DISTURBANCE - PRIORITY 2012-03-01
## 2 DISTURBANCE - PRIORITY 2012-03-01
## 3 DISTURBANCE - PRIORITY 2012-03-01
## 4 DISTURBANCE - PRIORITY 2012-03-01
## 5 DISTURBANCE - PRIORITY 2012-03-01
## 6 DISTURBANCE - PRIORITY 2012-03-01
## x_coordinate y_coordinate census_tract DISTRICT PRECINCT
## 1 7641076 684831 4900 810 CE
## 2 7642640 683167 10600 842 CE
## 3 7643599 683216 10600 842 CE
## 4 7644359 693642 3502 590 NO
## 5 7644771 683859 10600 842 CE
## 6 7650214 692359 2401 660 NO
## [1] 100 2303 4900 8202 980000
## CATEGORY CALL.GROUPS final_case_type
## Length:824247 Length:824247 Length:824247
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## CASE.DESC occ_date x_coordinate
## Length:824247 Min. :2012-03-01 00:00:00 Min. :7604367
## Class :character 1st Qu.:2013-06-23 00:00:00 1st Qu.:7644078
## Mode :character Median :2014-08-30 00:00:00 Median :7655131
## Mean :2014-08-01 17:33:06 Mean :7657912
## 3rd Qu.:2015-09-20 00:00:00 3rd Qu.:7671668
## Max. :2016-09-30 00:00:00 Max. :7711207
## y_coordinate census_tract DISTRICT PRECINCT
## Min. :642230 Min. : 100 Min. :510.0 Length:824247
## 1st Qu.:676050 1st Qu.: 2303 1st Qu.:670.0 Class :character
## Median :683582 Median : 4900 Median :841.0 Mode :character
## Mean :684385 Mean : 6324 Mean :796.7
## 3rd Qu.:690689 3rd Qu.: 8202 3rd Qu.:932.0
## Max. :732085 Max. :980000 Max. :990.0
## [1] "STREET CRIMES" "OTHER" "MOTOR VEHICLE THEFT"
## [4] "BURGLARY"
## [1] "DISORDER" "PERSON CRIME" " SUSPICIOUS"
## [4] "NON CRIMINAL/ADMIN" "PROPERTY CRIME" "TRAFFIC"
## [1] "DISTP " "DISTW " "VICE " "ASSLTP" "ASSLTW" "ROBP " "ROBW "
## [8] "SHOOTW" "SHOTS " "STABW " "THRETP" "THRETW" "GANG " "AREACK"
## [15] "PREMCK" "SUSP " "SUSPP " "SUSPW " "ANIML " "ANIMLP" "BOMBTH"
## [22] "CHEMTH" "DIST " "ESCAPE" "FWB " "FWH " "FWI " "FWN "
## [29] "NOISE " "CHEM " "PARK " "PARTY " "POLINV" "SCHL " "SCHLP "
## [36] "THRET " "TMET " "TMETP " "TRASH " "TRASHP" "UNWNT " "TMETW "
## [43] "UNWNTP" "UNWNTW" "W26 " "ASSIST" "77" "CIVIL " "EVICT "
## [50] "FOLLOW" "MSG " "FLAG " "PROP " "RED " "RIVPOL" "SEIZE "
## [57] "SERVE " "STNDBY" "TRANS " "WARR " "WARRC " "WELCK " "SUBSTP"
## [64] "WELCKP" "ASSLT " "DEVICE" "ROB " "SHOOT " "STAB " "BURG "
## [71] "FRAUD " "FRAUDP" "THEFT " "IDENT " "THEFTC" "THEFTP" "VAND "
## [78] "VANDP " "VEHST " "ACCHR " "ACCHRP" "ACCINJ" "ACCNON" "ACCUNK"
## [85] "DUII " "HAZARD" "TRASTP" "WRONG " "FPURS " "TPURS " "VEHREC"
## [92] "VEHSTP" "PROWLP" "BURGP " "SCHLW " "RSTLN " "ZERO " "GREAT "
## [99] "SCHLET" "HOSTGE"
## [1] "DISTURBANCE - PRIORITY "
## [2] "DISTURBANCE - WITH WEAPON *H "
## [3] "VICE-DRUGS, LIQUOR, PROSTITUTION, GAMBLING "
## [4] "ASSAULT - PRIORITY "
## [5] "ASSAULT - WITH WEAPON *H "
## [6] "ROBBERY - PRIORITY *H "
## [7] "ROBBERY - WITH WEAPON *H "
## [8] "SHOOTING - WITH WEAPON *H "
## [9] "SHOTS FIRED "
## [10] "STABBING - WITH WEAPON *H "
## [11] "THREAT - PRIORITY "
## [12] "THREAT - WITH WEAPON *H "
## [13] "GANG RELATED "
## [14] "AREA CHECK "
## [15] "PREMISE CHECK "
## [16] "SUSPICIOUS SUBJ, VEH, OR CIRCUMSTANCE "
## [17] "SUSPICIOUS - PRIORITY "
## [18] "SUSPICIOUS - WITH WEAPON *H "
## [19] "ANIMAL PROBLEM "
## [20] "ANIMAL PROBLEM - PRIORITY "
## [21] "BOMB - THREAT (33B) "
## [22] "CHEMICAL OR BIOLOGICAL THREAT (33CTH) "
## [23] "DISTURBANCE - COLD "
## [24] "ESCAPE FROM CUSTODY "
## [25] "FIREWORKS - NOISE (BROADCAST ONLY) "
## [26] "FIREWORKS - HAZARD "
## [27] "FIREWORKS - ILLEGAL "
## [28] "FIREWORKS - NOISE (MDC DISPATCH) "
## [29] "NOISE DISTURBANCE "
## [30] "CHEMICAL OR BIOLOGICAL (33C) "
## [31] "PARKING PROBLEM "
## [32] "PARTY DISTURBANCE "
## [33] "BOMB OR CHEM POLICE INVESTIGATION (33B/33C) "
## [34] "SCHOOL INCIDENT - COLD "
## [35] "SCHOOL INCIDENT - PRIORITY "
## [36] "THREAT - COLD "
## [37] "TRIMET INCIDENT - COLD "
## [38] "TRIMET INCIDENT - PRIORITY "
## [39] "ILLEGAL DUMPING - COLD "
## [40] "ILLEGAL DUMPING - PRIORITY "
## [41] "UNWANTED PERSON "
## [42] "TRIMET INCIDENT - WITH WEAPON *H "
## [43] "UNWANTED PERSON - PRIORITY "
## [44] "UNWANTED PERSON - WITH WEAPON *H "
## [45] "DETOX TRANSPORT "
## [46] "ASSIST - CITIZEN OR AGENCY "
## [47] "SUBJECT STOP - SDC "
## [48] "CIVIL - CIVIL PROBLEM "
## [49] "CIVIL - EVICTION "
## [50] "FOLLOW-UP "
## [51] "DELIVER MESSAGE "
## [52] "FLAGDOWN "
## [53] "PROPERTY LOST, FOUND, RECOVERED "
## [54] "ASSISTANCE - FIRE / EMS NEED POLICE *H "
## [55] "RIVER - MARINE INCIDENT "
## [56] "CIVIL - PROPERTY SEIZURE "
## [57] "CIVIL - SERVE PAPERS "
## [58] "CIVIL - STANDBY "
## [59] "TRANSPORT "
## [60] "WARRANT "
## [61] "WARRANT - WALK-IN / COUNTER "
## [62] "WELFARE CHECK - COLD "
## [63] "PERSON CONTACT (86) "
## [64] "WELFARE CHECK - PRIORITY "
## [65] "ASSAULT - COLD "
## [66] "BOMB - DEVICE DISCOVERED (33B) *H "
## [67] "ROBBERY - COLD "
## [68] "SHOOTING - COLD "
## [69] "STABBING - COLD "
## [70] "BURGLARY - COLD "
## [71] "FRAUD - COLD "
## [72] "FRAUD - PRIORITY "
## [73] "THEFT - COLD "
## [74] "IDENTITY THEFT "
## [75] "THEFT - SUBJECT IN CUSTODY "
## [76] "THEFT - PRIORITY "
## [77] "VANDALISM - COLD "
## [78] "VANDALISM - PRIORITY "
## [79] "VEHICLE STOLEN - COLD "
## [80] "ACCIDENT - HIT AND RUN - COLD "
## [81] "ACCIDENT - HIT & RUN - PRIORITY "
## [82] "ACCIDENT - INJURY "
## [83] "ACCIDENT - NON INJURY "
## [84] "ACCIDENT - UNKNOWN INJURY "
## [85] "DRIVING UNDER INFLUENCE "
## [86] "HAZARD - HAZARDOUS CONDITION "
## [87] "TRAFFIC STOP "
## [88] "HAZARD - WRONG-WAY DRIVER *H "
## [89] "FOOT PURSUIT *H "
## [90] "TRAFFIC PURSUIT *H "
## [91] "VEHICLE RECOVERED "
## [92] "VEHICLE STOLEN - PRIORITY "
## [93] "PROWLER "
## [94] "BURGLARY - PRIORITY *H "
## [95] "SCHOOL INCIDENT - WITH WEAPON *H "
## [96] "ROLLING STOLEN *H "
## [97] "ASSISTANCE - RESPONDER EMERGENCY *H "
## [98] "GREAT - SRO INITIATED ACTIVITY "
## [99] "SCHOOL EVENTS "
## [100] "HOSTAGE SITUATION *H "
## [1] 4900 10600 3502 2401 3200 702 8302 9201 9701 9804
## [11] 4101 3501 2203 200 2902 8902 601 8100 10200 4002
## [21] 5800 3401 3302 1801 501 602 8202 9502 3803 3801
## [31] 5100 3301 1101 3601 301 701 7700 9000 9102 9301
## [41] 6404 4001 3901 3902 1102 1302 7900 8400 9101 6502
## [51] 3802 5900 802 2303 2100 402 7800 7300 8002 801
## [61] 2903 9202 6403 3702 1400 2701 1602 9302 9501 9702
## [71] 5000 7202 901 2000 302 6100 4800 8201 1201 1702
## [81] 4500 7500 6200 5700 2502 3602 5200 100 2501 8800
## [91] 4102 3701 2901 8301 1701 4200 2402 7400 7600 9400
## [101] 4700 6602 5600 3402 3100 2702 8600 8500 4300 6702
## [111] 401 5500 502 7201 2801 1301 8901 6701 1601 902
## [121] 8001 9803 1000 7000 1900 2802 8700 6001 2600 4602
## [131] 3000 1202 4601 30502 9903 6300 6501 1500 1802 6801
## [141] 6402 30600 20900 3603 22206 6802 980000 6002 6601 6900
## [151] 30102 30402 20800 22208 21802 20100
Category’s percentage:
Call description’s percentage:
Final case type’s percentage: Census tract’s percentage: District’s percentage: Date’s percentage:District Density Analysis:
## NULL
District vs Category:
District vs Final Case Type:
Load data:
Date vs Category:
Date vs District: